;;########################################################################
;; misdsd2.lsp
;; Copyright (c) 1998 by Pedro Valero (valerop@uv.es)
;; Code for Missing Data Analysis and Imputation. 
;;Computes EM estimation of correlations. Imputes data without and with a 
;;normal random error added
;;########################################################################

 

;;
;;Functions for preparing data for computing the EM algorithm          
;;
(defun dummy (var)
  "Args: (VAR) Changes the nil in a variable for zeros and for ones if not nil"
  (if (equal var nil) 0 1))

(defun data-matrix-missing (data)
  "Args: (DATA) Changes the nil in a variable for zeros and for ones if not nil"
  (map-elements 'dummy data))


(defun patterns-missing (data)
  "Args: (DATA) Outputs the different patterns of missing data in DATA."
  (remove-duplicates 
   (row-list (data-matrix-missing data)) :test 'equalp)
  )

;<<<<<<< MISSD2.LSP
(defun cases-in-missing-patterns (data &key (patterns nil))
;=======
;(defun cases-in-missing-patterns (data)
;>>>>>>> 1.3
  "Args: (DATA) Outputs cases in each of the patterns of missing data obtained with the function patterns-missing"
 (let* (
        (data data)
        (patterns (if patterns patterns (patterns-missing data)))
        (number-of-patterns (length patterns))
        (matrix-missing (data-matrix-missing data))
        (rw-list-miss (row-list matrix-missing))
        (n (array-dimension matrix-missing 0))
        (cases-with-patterns nil)
        )
   (dotimes (i number-of-patterns)
            (setf cases-with-patterns 
                 (append cases-with-patterns 
                          (list (which 
                                 (mapcar #'equalp 
                                         rw-list-miss
                                         (repeat (list (select patterns i))
                                                 n))))
                          ))
            )
   (list cases-with-patterns (mapcar #'length cases-with-patterns))))
 



(defun Observed-in-missing-pattern (patterns i)
  "Args: (PATTERNS) PATTERNS are patterns in missing data. I is the pattern to be explored to find which values are present i.e. non-missing. Returns the index of present values."
  (let 
    (
     (patterns patterns)
     (i i)
     (O nil)
     )
    (setf O (which (map-elements #'equalp 1 (select patterns i)))) 
    O))

(defun observed-in-missing-pattern-list (patterns)
  (let* (
         (patterns patterns)
         (i (length patterns))
         (observed-in-missing-pattern-list nil)
         )

    (dotimes (i (length patterns))
             (setf observed-in-missing-pattern-list 
                   (append observed-in-missing-pattern-list 
                           (list (observed-in-missing-pattern patterns i)))
                   ))
    observed-in-missing-pattern-list))

(defun Missing-in-missing-pattern-list (patterns)
  (let* (
         (patterns patterns)
         (i (length patterns))
         (missing-in-missing-pattern-list nil)
         )

    (dotimes (i (length patterns))
             (setf missing-in-missing-pattern-list 
                   (append missing-in-missing-pattern-list 
                           (list (missing-in-missing-pattern patterns i)))
                   ))
    missing-in-missing-pattern-list))
  
(defun Missing-in-missing-pattern (patterns i)
"Args: (PATTERNS) PATTERNS are patterns in missing data. I is the pattern to be explored to find which values are missing. Returns the index of missing values."
  (let 
    (
     (patterns patterns)
     (i i)
     (O nil)
     )
    (setf O (which (map-elements #'equalp 0 (select patterns i)))) 
    O))


(defun Tobs (data &key (patterns-missing nil))
  "Args: (DATA) A matrix of data. Computes preliminary information to be used by the EM algorithm."
  (let* 
    (
     
     (data data)
     (num-variables (length (column-list data)))
     (Tobs (make-array (list (+ 1 num-variables) 
                             (+ 1 num-variables))
                       :initial-element 0))  
     (patterns (if patterns-missing patterns-missing (patterns-missing data)))
     (cases-in-patterns (cases-in-missing-patterns data :patterns patterns))  
     (rows-in-patterns (select cases-in-patterns 0))
     (n-patterns (select cases-in-patterns 1))
     (rows-pattern-s nil)
     (data-pattern-s nil)
     (n-s nil)
     (suma-s nil)
     (mult-s nil)
     )

    (dotimes (s (length n-patterns))
            
             (setf variables-observed (observed-in-missing-pattern patterns s)) 
             (setf data-pattern-s (select data (select rows-in-patterns s)
                                variables-observed))
             (setf n-s (array-dimension data-pattern-s 0))   
             (setf suma-s (mapcar 'sum (column-list data-pattern-s)))
             
             (setf mult-s (cross-product data-pattern-s))
             (setf Tobs-s (make-array (list (+ 1 num-variables) 
                                            (+ 1 num-variables))
                                      :initial-element 0))  
            (when variables-observed 
             (setf (select Tobs-s 0 (+ 1 variables-observed) ) 
                   (make-array 
                    (list 1  (length suma-s)) :initial-contents suma-s))
             
             (setf (select Tobs-s  (+ 1 variables-observed) 0) 
                   (make-array 
                    (list   (length suma-s) 1) :initial-contents suma-s))

             (setf (select Tobs-s 0 0 ) (length (select rows-in-patterns s)))
          
             (setf (select Tobs-s (+ 1 variables-observed)
                           (+ 1 variables-observed)) 
                   (make-array 
                    (array-dimensions mult-s)
                    :initial-contents mult-s)))
             (setf tobs (+ tobs tobs-s))
             )
    
    (list tobs rows-in-patterns n-patterns data patterns))
  )


(defun mean-var-uniwise (data)
  "Args: (DATA) Computes means and variances for a matrix with missing data."
  (let (
        (uniwisedata nil)     
        (mediauniw nil) 
        (varianceuniw nil)     
        (data data)        
        )
    (dotimes (i (select (array-dimensions data) 1))
             (setf uniwisedata (non-missing (col data i)))
             (setf mediauniw (append mediauniw (list (mean uniwisedata))))
             (setf varianceuniw (append varianceuniw (list (variance uniwisedata))))
             )                               
    (list mediauniw varianceuniw) )
  )
  

;;
;;The core of the code for EM. This owes much to Schaffer (1997). Analysis of incomplete multivariate data. Chapman & Hall          
;;
(defun e-m (output-tobs iter  difference &optional  initial-matrix)
  "Args: (OUTPUT-TOBS ITER DIFFERENCE) OUTPUT-TOBS. Output produced by the function TOBS. ITER. Maximun number of iterations. DIFFERENCE. Minimun difference with previous matrix of covariance for stopping the iterations. Initial matrix to start with the iterations"
  (let* (
         (iter iter)
         (difference difference)
         (t-matrix nil)
         (output-tobs output-tobs)             
         (s (length (select output-tobs 1)))
         (p (- (select  (array-dimensions (first output-tobs)) 0) 1))
         (Parametters              
          (make-array 
           (array-dimensions (first output-tobs))
           :initial-element 0))
         (prev-parametters      
          (if initial-matrix
              (setf (select 
                     parametters
                     (iseq 1 p) (iseq 1 p))
                    initial-matrix)
              (make-array 
               (array-dimensions (first output-tobs))
               :initial-element 0)))
         (t-obs (first output-tobs)) 
         (rows-in-patterns (select output-tobs 1))   
         (c (make-array (list p 1)
                        :initial-element 0))
         (n-cases (array-dimension (select output-tobs 3) 0))
         (data (make-array  (array-dimensions (select output-tobs 3))
                         :initial-contents (select output-tobs 3)))           
         (means-uniwise
          (make-array  (list 1 p)
                       :initial-contents 
                       (mapcar #'(lambda (var)
                                   (mean (non-missing var)))
                               (column-list data))))
         (variances-uniwise (make-array  (list 1 p)
                         :initial-contents            
                                         (mapcar #'(lambda (var)
                                   (variance (non-missing var)))
                               (column-list data))))

         (r (make-array (list s p) :initial-contents (select output-tobs 4)))
         (patterns (select output-tobs 4));esto deberia quitarlo esta repetido
         (l 0)
         (elementwise-convergence nil)         
         (missing-in-missing-pattern-list (Missing-in-missing-pattern-list patterns))
         (observed-in-missing-pattern-list (observed-in-missing-pattern-list patterns))
         (loglikelihood-em nil)
         (log-difs nil)
         (iter-difs nil)
         (initial-likelihood nil)
         (ct)
         )

;<<<<<<< MISSD2.LSP
                                  
    (setf *missing-report-window* 
          (report-header "Missing Data Imputation" 
                         :page t :scroll t :location '(250 100)))
    (display-string 
              (format nil "Missing Data Imputation~%Copyright (c) 1998-2000 Pedro Valero (valerop@uv.es)~2%Imputation is an iterative process.~%The iteration history appears below.") *missing-report-window*)
;>>>>>>> 1.3
    
    ;Construccion de la matriz de parametros. Utiliza las medias uniwise y las correlaciones a cero
    (setf (select parametters 0 0) -1)
    (setf (select parametters 0 (iseq 1 p)) (select means-uniwise 0 (iseq 0 (- p 1))))

    ;Tierney p. 164 tiene una funcion para esto   
    (setf (select parametters (iseq 1 p) 0) 
          (transpose (select means-uniwise 0 (iseq 0 (- p 1)))))
    (when (not initial-matrix)
          (dotimes (i p)  
                   (setf (select parametters (+ 1 i) (+ 1 i)) 
                         (select variances-uniwise 0 i))
                   ))

    (setf initial-likelihood
          (loglikelihood p s r patterns parametters 
                         rows-in-patterns data
                         observed-in-missing-pattern-list))

    (if (complexp initial-likelihood)                
          (add-text *missing-report-window*  
                    (format nil "~2%Initial loglikelihood  ~15,6f " 
                            "null")
                    :scroll t)
        (add-text *missing-report-window*  
                  (format nil "~2%Initial loglikelihood  ~15,6f " 
                          initial-likelihood)
                  :scroll t)
        )

    
    ;Empieza el algoritmo EM
  
    (dotimes (z iter)

             (setf t-matrix (copy-array t-obs))
             (setf prev-parametters  (copy-array parametters))
             (dotimes (i s) 
                      
                      (dotimes (j p)
                               
                               (if (and (equalp (aref r i j) 1) 
                                        (> (aref parametters (+ 1 j) (+ 1 j)) 0))
                       
                                   (setf parametters 
                                         (select 
                                          (schafer-sweep-operator 
                                           parametters (list (+ 1 j))) 0))
                                   )
                           
                               (if  (and (equalp (aref r i j) 0) 
                                         (< (aref parametters (+ 1 j) (+ 1 j)) 0))                              
                                    (setf parametters 
                                          (select  
                                           (reverse-schafer-sweep-operator 
                                            parametters (list (+ 1 j))) 
                                           0))
                                    ))
                             
                      
   (if (and (select missing-in-missing-pattern-list i)
            (select observed-in-missing-pattern-list i)) ;this avoids the pattern without missing or all missing enters in the loop
                   
       (dolist (m (select rows-in-patterns i))                  
               (setf (select c 
                             (select 
                              missing-in-missing-pattern-list 
                              i) 0)
                     (select parametters  
                             (+ 1 
                                (select missing-in-missing-pattern-list 
                                        i)) 0))
                              
                               
   (setf (select c 
                 (select missing-in-missing-pattern-list i) 0) 
         (map-elements #'+ 
                       (select c 
                               (select
                                missing-in-missing-pattern-list 
                                i) 0)
                       (matmult 
                        (select data m 
                                (select 
                                 observed-in-missing-pattern-list 
                                 i))
                        (select 
                         parametters 
                         (+ 1  (select
                                observed-in-missing-pattern-list
                                i))
                         (+ 1 (select 
                               missing-in-missing-pattern-list i)))

                        )))

                              
     (setf (select t-matrix 0 
                   (+ 1 (select missing-in-missing-pattern-list i))) 
           (map-elements #'+ (select t-matrix 0 
                                     (+ 1 (select                                                                       missing-in-missing-pattern-list i))) 
                         (select c 
                                 (select 
                                  missing-in-missing-pattern-list 
                                  i) 0)))
                                  
                          
(setf (select t-matrix (+ 1 (select missing-in-missing-pattern-list i)) 
              (map-elements #'+ 1 (select observed-in-missing-pattern-list i))) 
      (map-elements #'+ (select t-matrix 
                                (+ 1 (select missing-in-missing-pattern-list i)) 
                                (+ 1 (select observed-in-missing-pattern-list i))) 
                    (matmult (select c (select missing-in-missing-pattern-list i) 0) 
                             (select data m (select observed-in-missing-pattern-list i)) 
                             )))
                              
   (setf (select t-matrix 
                 (+ 1 (select missing-in-missing-pattern-list i)) 
                 (+ 1 (select missing-in-missing-pattern-list i))) 
         (map-elements #'+ (select t-matrix
                                   (+ 1 (select missing-in-missing-pattern-list i)) 
                                   (+ 1 (select missing-in-missing-pattern-list i)))
                       (select parametters 
                               (+ 1 (select missing-in-missing-pattern-list i))
                               (+ 1 (select missing-in-missing-pattern-list i)))
                       ;las covarianzas calculadas por la frmula normal
               ;y por el algoritmo EM varan en este punto
                       (matmult 
                        (select c 
                                (select missing-in-missing-pattern-list i) 0) 
                        (transpose (select c 
                                           (select missing-in-missing-pattern-list i) 0)))))
                                  ))
                      ;; This makes the matrix symmetric
      (if (select missing-in-missing-pattern-list i)
          (setf (select t-matrix 
               (+ 1 (select missing-in-missing-pattern-list i))  0)
                (transpose (select t-matrix 
                                   0 (+ 1 (select missing-in-missing-pattern-list i))))
                ))
                          
                      (if (select missing-in-missing-pattern-list i)
                          (setf (select t-matrix 
                                        (+ 1 (select observed-in-missing-pattern-list i)) 
                                        (+ 1 (select missing-in-missing-pattern-list i)))
                                (transpose 
                                 (select t-matrix (+ 1 (select 
                                            missing-in-missing-pattern-list i))
                     (+ 1 (select observed-in-missing-pattern-list i)))))
                          ))
                           
           
            
             ;Aqu calcula los parmetros  
           
             (setf parametters (first (schafer-sweep-operator 
                                       (matmult (/ 1 n-cases) t-matrix) (list 0))))
             
            
             
             (setf loglikelihood-em 
                   (loglikelihood p s r patterns parametters rows-in-patterns data observed-in-missing-pattern-list ))
             ; Esto  calcula la diferencia entre matrices de parametros
            

             (setf difparametters 
                   (- parametters  
                      prev-parametters)) 
             (setf log-difs (append log-difs (list loglikelihood-em)))
             (setf iter-difs (append iter-difs (list (sum (abs difparametters)))))

             (when (< z 10)
                   (setf elementwise-convergence
                         (append  elementwise-convergence 
                                  (list difparametters) 
                                  )))
             (when (>= z 10)
                   (pop elementwise-convergence)
                   (setf elementwise-convergence
                         (append  elementwise-convergence
                                  (list difparametters) 
                         )))
           ; toda esta parte calcula la detencion del calculo
             (setf differencenow (sum (abs difparametters)))
            (if (equal z 0)
                (display-string
                      (format nil "~2%~12a ~12a ~12a" "Iteration" "Difference" "Loglikelihood") *missing-report-window*))

             (when (complexp loglikelihood-em) (setf loglikelihood-em "null"))
             (display-string 
                       (format nil "~%~8d ~14,6f ~14,6f" z  differencenow loglikelihood-em) 
                       *missing-report-window*)
             (send *missing-report-window* :fit-window-to-text)
             (if (< differencenow difference) 
                 (terpri))
             (when (< differencenow difference) 
                   
                   (display-string 
                    (format nil "~%Iterations stopped because improvement < ~,3g" difference)
                    *missing-report-window*)
                   (display-string 
                    (format nil "~%Use Model Menu to report and visualize results")
                    *missing-report-window*)
                   (display-string 
                    (format nil "~%and to create data which include imputed missing values.")
                    *missing-report-window*)
                   (send *missing-report-window* :fit-window-to-text)
                   )
             (if (< differencenow difference)  (return))
             (when (equal z (1- (send self :iterations))) 
                   (display-string 
                    (format nil "~%Warning: the EM algorithm did not converge in ~,3g iterations."  (send self :iterations))
                    *missing-report-window*)
                   (display-string 
                    (format nil "~%This may happen because the matrix is nearly singular.")
                    *missing-report-window*))
             )
    (send *missing-report-window* :top-most t)
    (send *missing-report-window* :redraw)
    (setf means-em (select parametters 0 (iseq 1 (- (array-dimension parametters 0) 1))))
    (setf matrix-em (select parametters  (iseq 1 (- (array-dimension parametters 0) 1))
                            (iseq 1 (- (array-dimension parametters 0) 1))
                            ))
;<<<<<<< MISSD2.LSP
    (list means-em matrix-em parametters output-tobs elementwise-convergence log-difs iter-difs rows-in-patterns) ;elementwise-convergence
;=======
;    (list means-em matrix-em parametters output-tobs elementwise-convergence log-difs iter-difs) ;elementwise-convergence
;>>>>>>> 1.3
    
    )
  )

; The definition of Schaffer of the sweep operator is different of Tierneys
; To simplify things It is programmed here

(defun reverse-schafer-sweep-operator (matrix row)
"Args: (MATRIX ROW) MATRIX: A matrix of variances-covariances. ROW: Row to be reverse-sweep for. This is the reverse-sweep operator as defined in Schaffer 1997."
  (let* 
    (
     (matrix matrix)
     (row row)
     (dimensions (select (array-dimensions matrix) 0))     
     )
    (setf output-matrix (select (sweep-operator matrix row) 0))
    (setf (select output-matrix row row)  (* -1 (select output-matrix row row)))

    (dotimes (j dimensions)
             (if (not (equal j (first row)))                       
                 (setf (select output-matrix  j (first row) ) 
                       (* -1 (select output-matrix j (first row)  )))))
    (list output-matrix row))
  )

(defun schafer-sweep-operator (matrix row)
"Args:(MATRIX ROW) MATRIX: A matrix of variances-covariances. ROW: to be sweep for. This is the reverse-sweep operator as defined in Schaffer 1997."
  (let* 
    (
     (matrix matrix)
     (row row)
     (dimensions (select (array-dimensions matrix) 0))     
     )
    
    (setf output-matrix (select (sweep-operator matrix row) 0))
    (setf (select output-matrix row row)  (* -1 (select output-matrix row row)))
    (dotimes (j dimensions)
             (if (not (equal j (first row)))
                 (setf (select output-matrix  (first row) j) 
                       (* -1 (select output-matrix  (first row) j))))
             )
    (list output-matrix row))
  )






(defun my-border-matrix (a b c d)
  "Args: (A B C D) A: a matrix to bordered. B: a list that will be the first column of new matrix. C: a list that will be the first row of the new matrix. D: a value that will be the first element in the new matrix. This is modified from Tierney's p. 165."
  (bind-rows 
   (concatenate 'list  (list d) c)
   (bind-columns b a)
   ))




(defun impute (output-em data)
  "Args: (OUTPUT-EM DATA ). 
OUTPUT-EM: The output of the EM function. 
DATA: are the matrix of data to be imputed. "
  (let* (
         (means-em (first output-em))
         ;(variances-em (second output-em))
         
         (Parametters (third output-em))
         (output-tobs (fourth output-em))
         (s (length (select output-tobs 1)))
         
         (p (- (array-dimension parametters 0) 1))       
         (rows-in-patterns (select output-tobs 1))
         (c (make-array (list p 1)
                        :initial-element 0))
         (n-cases (array-dimension (select output-tobs 3) 0))
         (data data)
         (data-random (copy-array data))
         
         (r (make-array (list s p) :initial-contents (select output-tobs 4)))
         (patterns (select output-tobs 4));esto deberia quitarlo esta repetido 
         (mean-c-error (compute-mean-c-error parametters s rows-in-patterns data r p patterns))      
         )
    

    (dotimes (i s) 
             (dotimes (j p)

                      (if (and (equalp (aref r i j) 1) 
                               (> (aref parametters (+ 1 j) (+ 1 j)) 0))
                       
                          (setf parametters 
                                (select 
                                 (schafer-sweep-operator 
                                  parametters (list (+ 1 j))) 0))
                          )                           
                      (if  (and (equalp (aref r i j) 0) 
                                (< (aref parametters (+ 1 j) (+ 1 j)) 0))                              
                           (setf parametters 
                                 (select  
                                  (reverse-schafer-sweep-operator 
                                   parametters (list (+ 1 j))) 
                                  0))
                           ))
                             

             (dolist (m (select rows-in-patterns i))                            
                   (dolist (n (missing-in-missing-pattern patterns i))
                                   (setf (select data m n)
                                         (sum (select parametters 0 (+ 1 n))          
                                              (* (select parametters (+ 1 n)      
                                                         (+ 1 
                                                            (observed-in-missing-pattern 
                                                             patterns i))) 
                                                 (select data m 
                                                         (observed-in-missing-pattern
                                                          patterns i)))))
                                    (setf (select data-random m n) 
                                          (+ (select data m n)          
                                             (* (select mean-c-error n) 
                                                (first (normal-rand 1)))))
                                   ))
             )
    
             (list data data-random))
    ) 
                            
                      



;este parece el ms rpido
(defun loglikelihood (p s r patterns parametters rows-in-patterns data observed-in-missing-pattern-list)
"Computes the loglikelihood of the result. Unused."
  (let* (
         (d 0)
         (l 0)
         (p p)
         (number-of-patterns s)
         (patterns r)
         (parametters parametters)
         
         (rows-in-patterns rows-in-patterns)
         (c (make-array (list p 1)
                        :initial-element 0))
         (data data)
         (M (make-array (array-dimensions parametters)
                       :initial-element 0)) 
         (tx 0)
         (patterns patterns)
         
         (observed-in-missing-pattern-list observed-in-missing-pattern-list)
         ;(parametters2 (copy-array parametters))
         (datainrows nil)
         (diff nil)
         (invparam nil)
         
         
         )
    
    (setf (select c (iseq 0 (- p 1)) 0) (select parametters  (+ 1 (iseq 0 (- p 1))) 0))
    
    (dotimes (i number-of-patterns)
             (when (select observed-in-missing-pattern-list i)
                   (setf d (log (determinant  (select parametters
 
                                                      (+ 1 (select observed-in-missing-pattern-list i))
                                                      (+ 1 (select observed-in-missing-pattern-list i))
                                                      )))) 
                   
                   (setf invparam (* (inverse  (select parametters
 
                                                       (+ 1 (select observed-in-missing-pattern-list i))
                                                       (+ 1 (select observed-in-missing-pattern-list i))
                                                       )) -1))
            
                   
                   (setf datainrows 
                         (select data 
                                 (select rows-in-patterns i) 
                                 (select observed-in-missing-pattern-list i)))
                   (setf diff  (- (column-list datainrows) (select c (select observed-in-missing-pattern-list i) 0)))
                   
                   (setf diff (make-array (array-dimensions datainrows) :initial-contents 
                                          (transpose (map-elements #'coerce diff 'list)))) ;el transpose es necesario para que los datos entren correctamente en la matriz. De todos modos quiero revisarlo algun dia porque no lo veo muy claro
                    
                    
             (setf l (- l (/ (+ (* (length (select rows-in-patterns i)) d) 
                                (* -1 (reduce #'+ (diagonal (matmult invparam (cross-product diff)))))) 2 )))
             
        
             ))
    l
    )
  )



